# import regular expresson library - re
import re


text          = "Search is the target. let's search the Search using " + \
                "search which can be accessed by re's search"
search_result = re.search('Search', text)

if search_result:
    print("re's search was successfull in searching the Search!")
# print(search_result)

re's search was successfull in searching the Search!


# re.search for searching a simple pattern (without any special characters)
text = [
    'Regular expressions can be used for advanced text pattern matching, extraction, and/or search-and-replace functionality.',
    'Shortened as regex.',
    'Regex is a sequence of characters that define a search pattern.',
    'Regular expressions are used in :',
    'Search engines',
    'Search and replace dialogs of word processors and text editors',
    'Text processing utilities such as sed and AWK',
    'Lexical analysis Regular',
    'Registr'
]

for line in text:
    if(re.search('Search', line)):
        print('Line -', line)

Line - Search engines
Line - Search and replace dialogs of word processors and text editors


# re.I can be used to ignore the case of the  text
for line in text:
    if(re.search('Search', line, re.I)):
        print('Line -', line)

Line - Regular expressions can be used for advanced text pattern matching, extraction, and/or search-and-replace functionality.
Line - Regex is a sequence of characters that define a search pattern.
Line - Search engines
Line - Search and replace dialogs of word processors and text editors


for line in text:
    if line[:3] == 'Reg':
        if len(line) >= 6:
            if line[6] == 'r':
                print(line)

Regular expressions can be used for advanced text pattern matching, extraction, and/or search-and-replace functionality.
Regular expressions are used in :
Registr


# Search for any line that starts with Reg followed by any 3 characters and ends with r

for line in text:
    if(re.search('^Reg...r', line)):
        print(line)

Regular expressions can be used for advanced text pattern matching, extraction, and/or search-and-replace functionality.
Regular expressions are used in :
Registr


if re.match(".at", "mat"):
    print("Matched")
else:
    print("Not matched")

Matched


if re.match(".ent$", "tent"):
    print("Matched")
else:
    print("Not Matched")

Matched


if re.match(".ent$", "tents"):
    print("Matched")
else:
    print("Not Matched")

Not Matched


register= input()
if re.match("^[1-9][0-9][a-zA-Z][a-zA-Z][a-zA-Z][0-9][0-9][0-9][0-9]$",register):
    print("Valid")
else:
    print("Invalid")

20bce0000
Valid


# We can aso write the same expression in the following way

register= input()
if re.match("^[1-9][0-9][a-zA-Z]{3}[0-9]{4}$",register):
    print("Valid")
else:
    print("Invalid")

20bce0000
Valid


my_string = input("Enter your input ")
if re.match('.*', my_string):
    print("Matched the input string")

Enter your input Bhargavi
Matched the input string


number = input('Enter input ')
if re.match("^\-?[1-9][0-9]*$",number):  #'\' is added in front of '-' to overcome its default meaning in REs
    print('Integer')
else:
    print('Not an integer')

Enter input 1345
Integer


text   = 'mail from abc@gmail.com to xyz@vit.ac.in about meeting @2PM'
mailid = re.findall('\S+@\S+',  text)
print(mailid)

['abc@gmail.com', 'xyz@vit.ac.in']


# Search and extract all the contact numbers from the customers details
# Use '()' to group the pattern which has to be extrated

customer_details = [
    'name1 name1@abc.com contact number: 1234567891 someotherinfo',
    'This does not have contact info',
    'name2 abc@xyx.co.in contact number: 9940693362 additional info',
    'my_name aaa123@abc.com contact number: 8765432567 another number contact number: 2345678912'
]

for item in customer_details:
    contact_numbers = re.findall('.*contact number: ([1-9][0-9]{9}).*', item) 
    
    if (len(contact_numbers) > 0):
        print(contact_numbers)

['1234567891']
['9940693362']
['2345678912']


# Search and extract all the contact numbers from the customers details
# Use '()' to group the pattern which has to be extrated

customer_details =[
    'name1 name1@abc.com contact number: 1234567891 someotherinfo',
    'This does not have contact info',
    'name2 abc@xyx.co.in contact number: 9940693362 additional info',
    'my_name aaa123@abc.com contact number: 8765432567 another number contact number: 2345678912'
]

for item in customer_details:
    contact_numbers = re.findall('contact number: ([1-9][0-9]{9})', item) 
    
    if (len(contact_numbers) > 0):
        print(contact_numbers)

['1234567891']
['9940693362']
['8765432567', '2345678912']

Symbol	Description
*literal*	Match literal string value *literal*
*re1* \| *re2*	Match *re1* or *re2*
?	matches zero or one of the preceding group.
*	matches zero or more of the preceding group.
+	matches one or more of the preceding group.
{n}	matches exactly n of the preceding group.
{n,}	matches n or more of the preceding group.
{,m}	matches 0 to m of the preceding group.
{n,m}	matches at least n and at most m of the preceding group.
^spam	means the string must begin with spam.
spam$	means the string must end with spam.
Period (.)	matches any character, except newline characters.
\d, \w, and \s	match a digit, word, or space character, respectively.
\D, \W, and \S	match anything except a digit, word, or space character, respectively.
[abc]	matches any character between the brackets (such as a, b, or c).
[^abc]	matches any character that isn’t between the brackets.

Programming for Data Science¶

rEgular eXPRESSions¶

Let's do some program¶

Character matching¶

Time for rules!¶

Write a program to check if the given input is an integer¶

Rules¶